knitr::opts_chunk$set(warning = FALSE, message = FALSE)
library(tidyverse)
library(lsa)
library(readxl)
library(plotly)
library(countrycode)
df <- read_delim('../../data/country_portfolios_dimensions.csv',delim = ';') %>%
rename(N=p) %>%
filter(type !='all') %>%
mutate(type = case_match(type,
'national' ~ 'National',
'international' ~ 'International',
'migrant' ~ 'Mobile'),
country_code =countrycode(country_code, origin = 'iso2c', destination = 'country.name')
)
countries_keep <- df %>% group_by(country_code) %>%
summarise(N=sum(N)) %>%
filter(N>1000) %>%
pull(country_code)
df <- df %>%
filter(country_code %in% countries_keep)
df_clean <- df %>%
group_by(type,for_group_id,country_code) %>%
summarise(N = sum(N)) %>%
ungroup() %>%
mutate(for_group_id = factor(for_group_id),
country_code = factor(country_code)) %>%
complete(type,for_group_id,country_code,fill=list(N=0))
#filter empty topics
empty_topics <- df_clean %>%
group_by(for_group_id,country_code) %>%
summarise(N= sum(N)) %>%
filter(N==0)
df_clean2 <- df_clean %>%
anti_join(empty_topics)
df_clean <- df_clean %>%
group_by(type,country_code) %>%
reframe(for_group_id,
N,
p = N/sum(N))
df_clean2 <- df_clean2 %>%
group_by(type,country_code) %>%
reframe(for_group_id,
N,
p = N/sum(N))
cossims <- function(df){
df %>%
select(-country_code,-N) %>%
pivot_wider(names_from = type,values_from = p) %>%
summarise(cos_IM = cosine(International,Mobile),
cos_IN = cosine(International,National),
cos_MN = cosine(Mobile,National))
}
cosine_sim <- df_clean %>%
group_by(country_code) %>%
cossims()
size versus change
ggplotly(
df %>% group_by(country_code) %>%
summarise(N=sum(N)) %>%
right_join(cosine_sim) %>%
pivot_longer(cols = cos_IM:cos_MN,names_to = 'relation',values_to = 'cosine',names_prefix = 'cos_') %>%
mutate(relation = case_when(relation=='IM' ~'International-Mobile',
relation=='IN' ~'International-National',
relation=='MN' ~'Mobile-National',
)) %>%
filter(country_code!='ZZALL') %>%
ggplot(aes(N, cosine, color=relation, label=country_code)) +
geom_point()+
scale_x_log10()
)
Try to normalize size
compare all papers vs all papers minus
(international/Mobile/National)
cossims_diff <- function(df){
df %>%
select(-country_code,-p) %>%
pivot_wider(id_cols = c(country_code,for_group_id),names_from = type,values_from = N) %>%
mutate(All = International + Mobile + National,
International = All - International,
Mobile = All - Mobile,
National = All - National) %>%
mutate(International = International/sum(International),
Mobile = Mobile/sum(Mobile),
National = National/sum(National)) %>%
summarise(International = cosine(International,All),
National = cosine(National,All),
Mobile = cosine(Mobile,All))
}
cosine_sim_diff <- df_clean %>%
group_by(country_code) %>%
cossims_diff()
ggplotly(
df %>% group_by(country_code) %>%
summarise(N=sum(N)) %>%
right_join(cosine_sim_diff) %>%
pivot_longer(cols = International:Mobile,names_to = 'relation',values_to = 'cosine') %>%
filter(country_code!='ZZALL') %>%
ggplot(aes(N, cosine, color=relation, label=country_code)) +
geom_point()+
scale_x_log10()
)
group sizes
ggplotly(
df %>% group_by(country_code, type) %>%
summarise(N=sum(N)) %>%
right_join(cosine_sim_diff %>% pivot_longer(International:Mobile, names_to='type', values_to='cosine')) %>%
# pivot_longer(cols = International:Mobile,names_to = 'relation',values_to = 'cosine') %>%
filter(country_code!='ZZALL') %>%
group_by(country_code) %>%
mutate(country_N = sum(N)) %>%
ggplot(aes(country_N, cosine, color=type,size=N, label=country_code)) +
geom_point()+
#
scale_x_log10()
)
NA
excluding topics that have all 0’s
cosine_sim_diff2 <- df_clean2 %>%
group_by(country_code) %>%
cossims_diff()
ggplotly(
df %>% group_by(country_code, type) %>%
summarise(N=sum(N)) %>%
right_join(cosine_sim_diff2 %>% pivot_longer(International:Mobile, names_to='type', values_to='cosine')) %>%
# pivot_longer(cols = International:Mobile,names_to = 'relation',values_to = 'cosine') %>%
filter(country_code!='ZZALL') %>%
group_by(country_code) %>%
mutate(country_N = sum(N)) %>%
ggplot(aes(country_N, cosine, color=type,size=N, label=country_code)) +
geom_point()+
scale_x_log10()
)
NA
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCmBgYHtyIHNldHVwfQprbml0cjo6b3B0c19jaHVuayRzZXQod2FybmluZyA9IEZBTFNFLCBtZXNzYWdlID0gRkFMU0UpCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KGxzYSkKbGlicmFyeShyZWFkeGwpCmxpYnJhcnkocGxvdGx5KQpsaWJyYXJ5KGNvdW50cnljb2RlKQpgYGAKCgpgYGB7cn0KZGYgPC0gcmVhZF9kZWxpbSgnLi4vLi4vZGF0YS9jb3VudHJ5X3BvcnRmb2xpb3NfZGltZW5zaW9ucy5jc3YnLGRlbGltID0gJzsnKSAlPiUgCiAgcmVuYW1lKE49cCkgJT4lIAogIGZpbHRlcih0eXBlICE9J2FsbCcpICU+JSAKICBtdXRhdGUodHlwZSA9IGNhc2VfbWF0Y2godHlwZSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgJ25hdGlvbmFsJyB+ICdOYXRpb25hbCcsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICdpbnRlcm5hdGlvbmFsJyB+ICdJbnRlcm5hdGlvbmFsJywKICAgICAgICAgICAgICAgICAgICAgICAgICAgJ21pZ3JhbnQnIH4gJ01vYmlsZScpLAogICAgICAgICBjb3VudHJ5X2NvZGUgPWNvdW50cnljb2RlKGNvdW50cnlfY29kZSwgb3JpZ2luID0gJ2lzbzJjJywgZGVzdGluYXRpb24gPSAnY291bnRyeS5uYW1lJykKKQoKYGBgCgpgYGB7cn0KY291bnRyaWVzX2tlZXAgPC0gZGYgJT4lIGdyb3VwX2J5KGNvdW50cnlfY29kZSkgJT4lIAogIHN1bW1hcmlzZShOPXN1bShOKSkgJT4lIAogIGZpbHRlcihOPjEwMDApICU+JSAKICBwdWxsKGNvdW50cnlfY29kZSkKCmRmIDwtIGRmICU+JSAKICBmaWx0ZXIoY291bnRyeV9jb2RlICVpbiUgY291bnRyaWVzX2tlZXApCmBgYAoKYGBge3J9CmRmX2NsZWFuIDwtIGRmICU+JSAKICBncm91cF9ieSh0eXBlLGZvcl9ncm91cF9pZCxjb3VudHJ5X2NvZGUpICU+JSAKICBzdW1tYXJpc2UoTiA9IHN1bShOKSkgJT4lIAogIHVuZ3JvdXAoKSAlPiUgCiAgbXV0YXRlKGZvcl9ncm91cF9pZCA9IGZhY3Rvcihmb3JfZ3JvdXBfaWQpLAogICAgICAgICBjb3VudHJ5X2NvZGUgPSBmYWN0b3IoY291bnRyeV9jb2RlKSkgJT4lIAogICAgIGNvbXBsZXRlKHR5cGUsZm9yX2dyb3VwX2lkLGNvdW50cnlfY29kZSxmaWxsPWxpc3QoTj0wKSkKCiNmaWx0ZXIgZW1wdHkgdG9waWNzCgplbXB0eV90b3BpY3MgPC0gZGZfY2xlYW4gJT4lIAogIGdyb3VwX2J5KGZvcl9ncm91cF9pZCxjb3VudHJ5X2NvZGUpICU+JSAKICBzdW1tYXJpc2UoTj0gc3VtKE4pKSAlPiUgCiAgZmlsdGVyKE49PTApCgpkZl9jbGVhbjIgPC0gZGZfY2xlYW4gJT4lIAogIGFudGlfam9pbihlbXB0eV90b3BpY3MpCmBgYAoKYGBge3J9CmRmX2NsZWFuIDwtIGRmX2NsZWFuICU+JSAKICBncm91cF9ieSh0eXBlLGNvdW50cnlfY29kZSkgJT4lIAogIHJlZnJhbWUoZm9yX2dyb3VwX2lkLAogICAgICAgICAgTiwKICAgICAgICAgIHAgPSBOL3N1bShOKSkKCmRmX2NsZWFuMiA8LSBkZl9jbGVhbjIgJT4lIAogIGdyb3VwX2J5KHR5cGUsY291bnRyeV9jb2RlKSAlPiUgCiAgcmVmcmFtZShmb3JfZ3JvdXBfaWQsCiAgICAgICAgICBOLAogICAgICAgICAgcCA9IE4vc3VtKE4pKQpgYGAKCgpgYGB7cn0KY29zc2ltcyA8LSBmdW5jdGlvbihkZil7CiAgZGYgJT4lIAogIHNlbGVjdCgtY291bnRyeV9jb2RlLC1OKSAlPiUgCiAgcGl2b3Rfd2lkZXIobmFtZXNfZnJvbSA9IHR5cGUsdmFsdWVzX2Zyb20gPSBwKSAlPiUgCiAgc3VtbWFyaXNlKGNvc19JTSA9IGNvc2luZShJbnRlcm5hdGlvbmFsLE1vYmlsZSksCiAgICAgICAgICAgIGNvc19JTiA9IGNvc2luZShJbnRlcm5hdGlvbmFsLE5hdGlvbmFsKSwKICAgICAgICAgICAgY29zX01OID0gY29zaW5lKE1vYmlsZSxOYXRpb25hbCkpCn0KYGBgCgoKYGBge3J9CmNvc2luZV9zaW0gPC0gZGZfY2xlYW4gJT4lIAogIGdyb3VwX2J5KGNvdW50cnlfY29kZSkgJT4lIAogIGNvc3NpbXMoKQpgYGAKCiMjIHNpemUgdmVyc3VzIGNoYW5nZQoKYGBge3IsIGZpZy5oZWlnaHQ9MTIsIGZpZy53aWR0aD0xMn0KZ2dwbG90bHkoCmRmICU+JSBncm91cF9ieShjb3VudHJ5X2NvZGUpICU+JSAKICBzdW1tYXJpc2UoTj1zdW0oTikpICU+JSAKICByaWdodF9qb2luKGNvc2luZV9zaW0pICU+JSAKICBwaXZvdF9sb25nZXIoY29scyA9IGNvc19JTTpjb3NfTU4sbmFtZXNfdG8gPSAncmVsYXRpb24nLHZhbHVlc190byA9ICdjb3NpbmUnLG5hbWVzX3ByZWZpeCA9ICdjb3NfJykgJT4lIAogIG11dGF0ZShyZWxhdGlvbiA9IGNhc2Vfd2hlbihyZWxhdGlvbj09J0lNJyB+J0ludGVybmF0aW9uYWwtTW9iaWxlJywKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgcmVsYXRpb249PSdJTicgfidJbnRlcm5hdGlvbmFsLU5hdGlvbmFsJywKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgcmVsYXRpb249PSdNTicgfidNb2JpbGUtTmF0aW9uYWwnLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICApKSAlPiUgCiAgZmlsdGVyKGNvdW50cnlfY29kZSE9J1paQUxMJykgJT4lIAogIGdncGxvdChhZXMoTiwgY29zaW5lLCBjb2xvcj1yZWxhdGlvbiwgbGFiZWw9Y291bnRyeV9jb2RlKSkgKwogIGdlb21fcG9pbnQoKSsKICAKICBzY2FsZV94X2xvZzEwKCkKKQpgYGAKCiMjIFRyeSB0byBub3JtYWxpemUgc2l6ZQojIyBjb21wYXJlIGFsbCBwYXBlcnMgdnMgYWxsIHBhcGVycyBtaW51cyAoaW50ZXJuYXRpb25hbC9Nb2JpbGUvTmF0aW9uYWwpCgoKYGBge3J9CmNvc3NpbXNfZGlmZiA8LSBmdW5jdGlvbihkZil7CiAgZGYgJT4lCiAgc2VsZWN0KC1jb3VudHJ5X2NvZGUsLXApICU+JSAKICBwaXZvdF93aWRlcihpZF9jb2xzID0gYyhjb3VudHJ5X2NvZGUsZm9yX2dyb3VwX2lkKSxuYW1lc19mcm9tID0gdHlwZSx2YWx1ZXNfZnJvbSA9IE4pICU+JSAKICBtdXRhdGUoQWxsID0gSW50ZXJuYXRpb25hbCArIE1vYmlsZSArIE5hdGlvbmFsLAogICAgICAgICBJbnRlcm5hdGlvbmFsID0gQWxsIC0gSW50ZXJuYXRpb25hbCwKICAgICAgICAgTW9iaWxlID0gQWxsIC0gTW9iaWxlLAogICAgICAgICBOYXRpb25hbCA9IEFsbCAtIE5hdGlvbmFsKSAlPiUgCiAgbXV0YXRlKEludGVybmF0aW9uYWwgPSBJbnRlcm5hdGlvbmFsL3N1bShJbnRlcm5hdGlvbmFsKSwKICAgICAgICAgTW9iaWxlID0gTW9iaWxlL3N1bShNb2JpbGUpLAogICAgICAgICBOYXRpb25hbCA9IE5hdGlvbmFsL3N1bShOYXRpb25hbCkpICU+JSAKICBzdW1tYXJpc2UoSW50ZXJuYXRpb25hbCA9IGNvc2luZShJbnRlcm5hdGlvbmFsLEFsbCksCiAgICAgICAgICAgIE5hdGlvbmFsID0gY29zaW5lKE5hdGlvbmFsLEFsbCksCiAgICAgICAgICAgIE1vYmlsZSA9IGNvc2luZShNb2JpbGUsQWxsKSkKfQpgYGAKCmBgYHtyfQpjb3NpbmVfc2ltX2RpZmYgPC0gZGZfY2xlYW4gJT4lIAogIGdyb3VwX2J5KGNvdW50cnlfY29kZSkgJT4lIAogIGNvc3NpbXNfZGlmZigpCmBgYAoKYGBge3J9CmdncGxvdGx5KApkZiAlPiUgZ3JvdXBfYnkoY291bnRyeV9jb2RlKSAlPiUgCiAgc3VtbWFyaXNlKE49c3VtKE4pKSAlPiUgCiAgcmlnaHRfam9pbihjb3NpbmVfc2ltX2RpZmYpICU+JSAKICBwaXZvdF9sb25nZXIoY29scyA9IEludGVybmF0aW9uYWw6TW9iaWxlLG5hbWVzX3RvID0gJ3JlbGF0aW9uJyx2YWx1ZXNfdG8gPSAnY29zaW5lJykgJT4lIAogIGZpbHRlcihjb3VudHJ5X2NvZGUhPSdaWkFMTCcpICU+JSAKICBnZ3Bsb3QoYWVzKE4sIGNvc2luZSwgY29sb3I9cmVsYXRpb24sIGxhYmVsPWNvdW50cnlfY29kZSkpICsKICBnZW9tX3BvaW50KCkrCiAgCiAgc2NhbGVfeF9sb2cxMCgpCikKYGBgCgoKIyMgZ3JvdXAgc2l6ZXMKCmBgYHtyfQpnZ3Bsb3RseSgKZGYgJT4lIGdyb3VwX2J5KGNvdW50cnlfY29kZSwgdHlwZSkgJT4lIAogIHN1bW1hcmlzZShOPXN1bShOKSkgJT4lIAogIHJpZ2h0X2pvaW4oY29zaW5lX3NpbV9kaWZmICU+JSBwaXZvdF9sb25nZXIoSW50ZXJuYXRpb25hbDpNb2JpbGUsIG5hbWVzX3RvPSd0eXBlJywgdmFsdWVzX3RvPSdjb3NpbmUnKSkgJT4lIAogICMgcGl2b3RfbG9uZ2VyKGNvbHMgPSBJbnRlcm5hdGlvbmFsOk1vYmlsZSxuYW1lc190byA9ICdyZWxhdGlvbicsdmFsdWVzX3RvID0gJ2Nvc2luZScpICU+JSAKICBmaWx0ZXIoY291bnRyeV9jb2RlIT0nWlpBTEwnKSAlPiUgCiAgZ3JvdXBfYnkoY291bnRyeV9jb2RlKSAlPiUgCiAgbXV0YXRlKGNvdW50cnlfTiA9IHN1bShOKSkgJT4lIAogIGdncGxvdChhZXMoY291bnRyeV9OLCBjb3NpbmUsIGNvbG9yPXR5cGUsc2l6ZT1OLCBsYWJlbD1jb3VudHJ5X2NvZGUpKSArCiAgZ2VvbV9wb2ludCgpKwogICMgCiAgc2NhbGVfeF9sb2cxMCgpCikKCmBgYAoKZXhjbHVkaW5nIHRvcGljcyB0aGF0IGhhdmUgYWxsIDAncyAKYGBge3J9Cgpjb3NpbmVfc2ltX2RpZmYyIDwtIGRmX2NsZWFuMiAlPiUgCiAgZ3JvdXBfYnkoY291bnRyeV9jb2RlKSAlPiUgCiAgY29zc2ltc19kaWZmKCkKCmdncGxvdGx5KApkZiAlPiUgZ3JvdXBfYnkoY291bnRyeV9jb2RlLCB0eXBlKSAlPiUgCiAgc3VtbWFyaXNlKE49c3VtKE4pKSAlPiUgCiAgcmlnaHRfam9pbihjb3NpbmVfc2ltX2RpZmYyICU+JSBwaXZvdF9sb25nZXIoSW50ZXJuYXRpb25hbDpNb2JpbGUsIG5hbWVzX3RvPSd0eXBlJywgdmFsdWVzX3RvPSdjb3NpbmUnKSkgJT4lIAogICMgcGl2b3RfbG9uZ2VyKGNvbHMgPSBJbnRlcm5hdGlvbmFsOk1vYmlsZSxuYW1lc190byA9ICdyZWxhdGlvbicsdmFsdWVzX3RvID0gJ2Nvc2luZScpICU+JSAKICBmaWx0ZXIoY291bnRyeV9jb2RlIT0nWlpBTEwnKSAlPiUgCiAgZ3JvdXBfYnkoY291bnRyeV9jb2RlKSAlPiUgCiAgbXV0YXRlKGNvdW50cnlfTiA9IHN1bShOKSkgJT4lIAogIGdncGxvdChhZXMoY291bnRyeV9OLCBjb3NpbmUsIGNvbG9yPXR5cGUsc2l6ZT1OLCBsYWJlbD1jb3VudHJ5X2NvZGUpKSArCiAgZ2VvbV9wb2ludCgpKwogIAogIHNjYWxlX3hfbG9nMTAoKQopCgpgYGA=